set more 1
clear

pause off 

capture log close

local date : di %td_CY-N-D date("$S_DATE", "DMY")



global dir_work_tabout  "../outputs"

global init_age = 15 


******************************************************************
******************************************************************

use NLSY97_PublicData_FinalSample, clear

gen earnings_de0 = (earnings) if enroll == 0 & earnings > 0 
label var earnings_de0 "Earnings after leaving school"	


gen logearnings_de0 = log(earnings) if enroll == 0 & earnings > 0 
label var logearnings_de0 "Log Earnings after leaving school"	

sort id age
by id: gen accdq = sum(dq)
gen lagaccdq = l.accdq 


gen neversmoker = (addiction==0) if addiction != .
label var neversmoker "Never Smoked Regularly as of t-1"

gen neversmoker_a20_ = neversmoker if age == 20
by id: egen neversmoker_a20 = max(neversmoker_a20_)
label var neversmoker_a20 "Never smoked before age 20"
drop neversmoker_a20_

gen dq0 = dq if addiction == 0

gen eversmoker = (addiction>0) if addiction != .
replace eversmoker = 1 if lagaccdq > 1 & lagaccdq !=. 
label var eversmoker "Smoked Regularly as of t-1"



gen hgc13more = (hgc >= 13) if hgc != .
label var hgc13more "College_and_more"


label var dq "Smoked Regularly"
label var enroll "Enrolled in School"

*gen dedk = enroll * work_part
label var dedk "Part-time Working while in School"

replace net_worth = net_worth/1000
label var net_worth "Net Worth (in Thousands)"

label var dq0 "Smoking initiation"

label var parents_clg  "Parents are 4-year college graduates"

label var hgc "Years of schooling"

label var dq    "Smoking"
label var neversmoker  "Never smoker"
label var addiction "Years smoked as of t-1"
label var enroll "School enrollment"
label var dedk   "Working part-time while in school"
label var logearnings_de0 "Log earnings"
label var net_worth   "Net worth (in thousands)"


gen addiction_dq1 = addiction if (addiction) > 0
label var addiction_dq1 "Years smoked as of t-1 (among smokers)"

 
******************************************************************
* Summary stables
******************************************************************


************ Main variables over age groups

assert age != . 

gen group_age = . 
replace group_age = 1 if age == $init_age
replace group_age = 2 if age == 20
replace group_age = 3 if age == 25
replace group_age = 4 if age == 30



cap matrix drop mat_out11


#delimit ; 

tabstat addiction neversmoker enroll dedk hgc earnings_de0, by (group_age) stat(mean) save
; 


#delimit cr

foreach x of numlist 1/4 {
matrix mat1 = r(Stat`x')'
matrix mat_out11 = [nullmat(mat_out11), mat1]
}

*matrix mat_out11 = [nullmat(mat_out11), r(StatTotal)']

matrix colnames mat_out11 = "Age_15""Age_20""Age_25""Age_30"

outtable using $dir_work_tabout/Table_01_panelC, mat(mat_out11) label nobox center caption("Key Variables over Age ") f(%100.2f %100.2f  %100.2f %100.2f %100.2f) clabel(tab:nlsy97:nlsy:sumstat:main) replace

matrix drop mat_out11


************ Age-30 education by smoking history *************

preserve 


keep if age ==30

capture matrix drop mat_out0

foreach var of varlist hgc hgc13more {

	mat T = J(1,4,.)
	ttest `var', by(neversmoker_a20)
	mat T[1,1] = r(mu_1)
	mat T[1,2] = r(mu_2)
	mat T[1,3] = r(mu_2)-r(mu_1)	
	mat T[1,4] = r(p)

	matrix mat_out0 = [nullmat(mat_out0) \ T]
}
	
matrix colnames mat_out0 = "Smoked_before_age20""Never_smoked_before_age20""Diff""p-value_of_Diff"
matrix rownames mat_out0 = "Years_of_schooling_(age_30)" "College_and_more_(age_30)"

outtable using $dir_work_tabout/Table_01_panelB,  mat(mat_out0) nobox center caption("Age-30 education by youth smoking history") f(%10.2f %10.2f %10.2f %10.2f) clabel(tab:educ:BY:dq) replace

restore 


************ Smoking status by age-30 education *************

preserve 

capture matrix drop mat_out0

keep if age == 30

replace net_worth_nlsy = . if age != 20 & age != 25 & age != 30
replace net_worth_nlsy = 0 if age == $init_age

capture matrix drop mat_out0

foreach var of varlist dq addiction neversmoker {

	mat T = J(1,4,.)
	ttest `var', by(hgc13more)
	mat T[1,1] = r(mu_1)
	mat T[1,2] = r(mu_2)
	mat T[1,3] = r(mu_2)-r(mu_1)	
	mat T[1,4] = r(p)

	matrix mat_out0 = [nullmat(mat_out0) \ T]
}
	
matrix colnames mat_out0 = "High_school_or_less""College_and_more""Diff""p-value_of_Diff"
matrix rownames mat_out0 = "Smoking" "Years_smoked""Never_smoked"

outtable using $dir_work_tabout/Table_01_panelA,  mat(mat_out0) nobox center caption("Youth smoking by education (age 30)") f(%10.2f %10.2f %10.2f %10.2f) clabel(tab:dq:BY:educ) replace


restore 

